#-*-coding:utf8-*-
#!/usr/bin/python
#  2016.12.26  down img

import requests
import os

home = "http://www.gamersky.com/"
root = ""   # image dir

def find_str_between(l, s_s, s_e):
    i_s = l.find(s_s) + len(s_s)
    s = l[i_s:]
    if s_e == "":
        return s
    else:
        i_e = s.find(s_e)
        s = s[:i_e]
        return s

def saveImg(img, s_path, s):
    if not os.path.exists(s_path):
        os.makedirs(s_path)

    try:
        (path, file) = os.path.split(img)
        # print(path)
        f = s_path + "/" + file
        f2 = f.replace("jpg", "png")

        if not os.path.exists(f) and not os.path.exists(f2):
            #print("\t%s" % file)
            # urllib.request.urlretrieve(img, f)
            img = s.get(img).content
            f = open(f, 'wb')
            f.write(img)
            f.close()
    except:
        print("保存文件出错！")

def get_ls(i_id):
    url = "http://db2.gamersky.com/LabelJsonpAjax.aspx"
    headers = {
        "Host": "db2.gamersky.com",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0",
        "Accept": "*/*",
        "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
        "Referer": "http://www.gamersky.com/ent/wp/",
        "Connection": "keep-alive"
    }
    cookies = {
        "Hm_lvt_dcb5060fba0123ff56d253331f28db6a": "1494900307",
        "Hm_lpvt_dcb5060fba0123ff56d253331f28db6a": "1494900753",
        "UM_distinctid": "15c0f00ea41c6-05936b93519cb18-4c322a79-100200-15c0f00ea426b",
        "ASP.NET_SessionId": "a0hho2gu1c25d3lw5k51pktq"
    }
    data = {
        'callback': 'jQuery183032042816298503374_1494900496446',
        'jsondata': '{"type":"updatenodelabel","isCache":true,"cacheTime":60,"nodeId":"20117","isNodeId":"true","page":%d}' % i_id,
        '_': '1494901211760'
    }

    con = requests.session()
    con.headers.update(headers)
    con.cookies.update(cookies)
    r = con.post(url, data)
    #print(r.text)
    s = r.text
    s = s.replace("><", ">\n<")
    ls = s.split("\n")
    for l in ls:
        if "href=" in l and "alt=" in l:
            l = l.replace("\\", "")
            s_h = find_str_between(l, 'href="', '"')
            s_a = find_str_between(l, 'alt="', '"')
            s_s = find_str_between(l, 'src="', '"')
            print("%s\t%s" % (s_h, s_a))
            downImgs(s_h)

def downImgs(url):
    (p, f) = os.path.split(url)
    p2, lab = os.path.split(p)
    lab2 = f.replace(".shtml", "")
    #print("%s\t%s" % (lab, lab2))
    path = "%simg/%s/%s" % (root, lab, lab2)
    
    con = requests.session()
    #s = con.get(url).content.decode('utf-8')
    s = con.get(url).text

    ls = s.split("\n")
    i1 = 0
    for l in ls:
        #print("\t%s" % p)
        #print(l)
        #if "<p" in l and "<a" in l and 'target="_blank"' in l:
        #    s_h = find_str_between(l, 'href="', '"')
        #    print(s_h)
        if url in l:
            ls2 = l.split("</a>")
            for l2 in ls2:
                s_h = find_str_between(l2, 'href="', '"')
                if(s_h == ""):
                    continue
                print("\t%s" % s_h)
                #s = con.get(s_h).content.decode('utf-8')
                s = con.get(s_h).text

                ls3 = s.split("\n")
                for l3 in ls3:
                    if "<p" in l3 and "<a" in l3 and 'target="_blank"' in l3 and ".jpg" in l3:
                        s_h = find_str_between(l3, 'href="', '"')
                        s_h = find_str_between(s_h, '?', '')
                        #print("\t%s" % s_h)
                        saveImg(s_h, path, con)

if __name__ == "__main__":

    for i in range(1, 3):
        get_ls(i)

    print("完成")
